import re
import os


class Parse:
    def __init__(self):
        self.html_path = "./html"
        self.html_name = "第3页.html"
        self.save_path = "./data"

    def get_html(self):
        with open(os.path.join(self.html_path,self.html_name),"r", encoding="utf-8") as f:
            return f.read()
        
    def parse_html(self, name):
        new_name = name.split(".")[0]+".txt"
        content = self.get_html()
        patterns = [r'class=\"firstTitle\" id=\"bw1\" title=(.*?)>', r'content\">(.*?)\u003C' ]
        # 使用 re.findall() 来找到所有匹配的内容  
        for pattern in patterns:
            matches = re.findall(pattern, content, re.DOTALL)  
            with open(os.path.join(self.save_path,new_name), "w",encoding="utf-8") as f:
                for matche in matches:
                    f.write(matche+"\n")
            

    
if __name__=="__main__":
    p = Parse()
    p.parse_html("第三页.html")
